/* 
    Purpose: Using the 1970 Census (1% sample), this file locates black and white women aged 
             30-50 who are mothers of a child younger than 18 in the same household. Other 
             variables necessary to create average predicted mother income (in 4d) are also 
             cleaned.

    Note: Income was asked of all individuals 14+ in the 1970 Census, 
          so there's no need to use a sample line weight (or any weight). 

    Creates: Census1970_mothers_ages30to50.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

    use ./input/Census1970_1pct_raw.dta, clear //download from IPUMS USA
        tab perwt //Confirmed: everyone receives a weight of 1.
            
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Fix income variables 
    replace inctot=. if inctot==9999999 
    replace inctot=0 if inctot<0
    
* Family income (v1--Census variable)
    replace ftotinc=. if ftotinc==9999999 
    replace ftotinc=0 if ftotinc<0
    
* Family income (v2--manually construct by summing individual income of family unit members)
    bysort serial famunit: egen fam_income = sum(inctot)

* Count discrepancies between v1 and v2 
    /*Note: Discrepancies appear to come 
            mostly from individuals living 
            in group quarters. */               
    count if ftotinc==. & fam_income>0 

* Harmonize v1 and v2                
    replace ftotinc = fam_income if ftotinc==. & fam_income>0
    
* Household income 

    //Grab one family member's income 
    sort serial famunit pernum
    by serial famunit: gen fam_head = _n==1
    
    gen temp = 0
    replace temp = ftotinc if fam_head==1 
    replace temp = 0 if ftotinc==. 
    
    //Add up incomes of "separate" families within a serial to get household income (i.e. income by serial number)
    bysort serial: egen hh_income = sum(temp)
    drop temp fam_head fam_income 
    
    rename ftotinc fam_income 

/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
    gen CPI1970 = 38.8
    gen CPI1950= 24.1
    
    foreach var of varlist inctot fam_income hh_income {
    replace `var' = `var' * (CPI1950 / CPI1970)
    }
  
* Keep respondents with non-zero and non-missing income 
    foreach var of varlist inctot fam_income hh_income {
    drop if `var'==0 | `var'==.
    }  
    
    tempfile fulldata
    save `fulldata'

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY MOTHERS
***************************  

    keep if age<18 //Restrict to children younger than 18
    keep serial momloc age
    
    replace momloc=. if momloc==0  
    drop if momloc==. //Exclude children without a mother in the house
    
    bysort serial momloc: keep if _n==1 //Keep all unique mother ids. Some mothers will have multiple children in the Census. 
    rename momloc pernum
    drop age

    tempfile children 
    save `children'

* Keep the sample of mothers 
    use `fulldata', clear
    merge 1:1 serial pernum using `children'
    keep if _merge==3 
    drop _merge

* Keep mothers who are heads of household
    tab relate
    keep if relate==1
                     
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** CREATE OTHER NECESSARY VARIABLES
***************************
  
* Keep black and white mothers ages 30 to 50
    keep if inrange(age,30,50) 
    keep if race==1 | race==2 

    tab statefip, m
    rename statefip fips
    
* Region of current residence
    drop region

    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division

    gen region_merge=.
    replace region_merge=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
    replace region_merge=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46 
    replace region_merge=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
    | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48 
    replace region_merge=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15 
    tab region_merge, m 
 
    label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
    label values region_merge region_l
    tab region_merge, m
   
    gen south_merge = region_merge==3
  
* Education variable 

    gen edu=.
    replace edu=1 if educd<=25 //<grade school (includes people with no schooling)
    replace edu=2 if educd==26 //8th grade
    replace edu=3 if inlist(educd,30,40,50,61) //<hs
    replace edu=4 if inlist(educd,60,62,63,64) //hs
    replace edu=5 if educd>64 & educ<999 //>hs. "999" is missing
    tab edu, m
        
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
    sort occ1950
    replace occ1950=. if occ1950>=980

* Count # of Census occupations in 1970 data
    bysort occ1950: gen nvals = _n ==1
    count if nvals==1 

* Separate people with occupations in 200's based on self-employment
    replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
            
* Crosswalk Census occupations to coarsened ANES occupations
    merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
    assert _merge!=1
    drop if _merge==2
    drop _merge
            
    tab occ1950 if occ1950ej==., m
    drop if occ1950ej==. 
            
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** SAVE 
****************

* Keep relevant variables               
    rename occ1950ej motheroccej

    keep race south_merge motheroccej inctot fam_income hh_income perwt 
                    
    gen census=1
    label var census "Census obs"
    
    foreach var of varlist inctot fam_income hh_income {
    gen log_mother_`var' = log(`var')
    label var log_mother_`var' "Log `var', Census"
    label var `var' "`var', Census"
    }
    
    rename fam_income faminc
    rename hh_income HHinc
        
    compress 
    save ./output/Census1970_mothers_ages30to50.dta, replace 
                    
    
